# Load & clean the ILO data -----------------------------------------------
df_ilo_ind <-
  read_csv("06_data_ILO_employment_sex_age_industry.csv") %>%
  select(
    country = ref_area.label,
    year = time,
    sex = sex.label,
    age = classif1.label,
    industry = classif2.label,
    employment = obs_value
  )

# Keep only the data that we actually need:
# That being female employment, aged 15+, and the data pertaining to ISIC
# Revision 4 industry codes, and drop the industry aggregates
df_ilo_ind <-
  df_ilo_ind %>% filter(grepl("female", sex, ignore.case = TRUE)) 
df_ilo_ind <-
  df_ilo_ind %>% filter(age == "Age (Youth, adults): 15+")
df_ilo_ind <-
  df_ilo_ind %>% filter(grepl("Rev.4", industry, ignore.case = TRUE)) 
df_ilo_ind <-
  df_ilo_ind %>% filter(!c(grepl("Total", industry, ignore.case = TRUE)))

# Those industry have unwieldy names. Let's trim them down some. Start by
# separating the long prefix, "Economic activity (ISIC-Rev.4): )" from the rest
# of the industry name
df_ilo_ind <- separate(
  df_ilo_ind,
  col = industry,
  into = c("A", "industry"),
  sep = "\\)",
  remove = TRUE
) %>%
  select(-A)

# Then, drop the string prefix, "Economic activity (ISIC-Rev.4):"
df_ilo_ind$industry <- 
  str_remove(df_ilo_ind$industry, pattern = "Economic activity (ISIC-Rev.4):")

# Next, get rid of the colons and letters, 
df_ilo_ind$industry <- str_remove(string = df_ilo_ind$industry, pattern = '.*\\.')

# Get rid of white space at the beginning
df_ilo_ind$industry <- str_trim(df_ilo_ind$industry)

# Finally, shorten the ILO industry category names
df_ilo_ind <- df_ilo_ind %>% 
  mutate(industry = case_when(
    industry == 'Activities of households as employers; undifferentiated goods- and services-producing activities of households for own use' ~ 'Activities of household as employers', 
    industry == 'Accommodation and food service activities'    ~ 'Accommodation and food service', 
    industry == 'Activities of extraterritorial organizations and bodies' ~ 'Extraterritorial organizations',
    industry == 'Administrative and support service activities'          ~ 'Administrative and support services',
    industry == 'Agriculture; forestry and fishing'            ~ 'Agriculture; forestry and fishing',
    industry == 'Arts, entertainment and recreation'           ~ 'Arts, entertainment and recreation', 
    industry == 'Constuction'                                  ~ 'Construction',
    industry == 'Education '                                   ~ 'Education',  
    industry == 'Electricity; gas, steam and air conditioning supply' ~ 'Electricity; gas, and steam',  
    industry == 'Financial and insurance activities'           ~ 'Finance and insurance',  
    industry == 'Human health and social work activities'      ~ 'Health and social work',  
    industry == 'Information and communication'                ~ 'Information and communication',  
    industry == 'Mining and quarrying'                         ~ 'Mining and quarrying',
    industry == 'Not elsewhere classified'                     ~ 'Not elsewhere classified',
    industry == 'Other service activities'                     ~ 'Other services',
    industry == 'Public administration and defence; compulsory social security' ~ 'Public administration', 
    industry == 'Real estate activities'                       ~ 'Real estate activities',
    industry == 'Professional, scientific and technical activities' ~ 'Scientific and technical activities',
    industry == 'Transportation and storage'                   ~ 'Transportation and storage',
    industry == 'Water supply; sewage, waste management'       ~ 'Water supply; sewage, waste management',
    industry == 'Wholesale and retail trade'                   ~ 'Wholesale and retail trade',
    industry == 'Wholesale and retail trade; repair of motor vehicles and motorcycles' ~ 'Wholesale and retail trade',
    industry == 'Water supply; sewerage, waste management and remediation activities' ~ 'Water supply; sewage, waste management', 
    TRUE ~ as.character(industry))) 

# Lastly, make "industry" a factor variable with "Manufacturing" as the baseline
df_ilo_ind <- mutate(df_ilo_ind, industry = as_factor(industry))
df_ilo_ind$industry <- fct_relevel(df_ilo_ind$industry, "Manufacturing")



